import pickle
from collections import Counter

import pandas as pd


def data_save_pkl(path, sheet_name):
    """
    将读取到的xlsx文件保存为二进制文件
    :param path:文件路径
    :param sheet_name:工作表姓名
    :return:
    """
    data = pd.read_excel(path, sheet_name=sheet_name)
    counters = {}
    dicts = {}
    # 读取数据信息
    usecols = list(range(0, data.shape[1] - 5))
    data = pd.read_excel(path, usecols=usecols, sheet_name=sheet_name)
    # 获取数据分类
    for column in data.columns:
        counters[column] = Counter(data[column])
    # 数据清洗
    for column, counter in counters.items():
        cols = [str(col) for col, _ in counter.items() if str(col) == '无' or '无' not in str(col)]
        if '无' in cols:
            # 创建一个新列表，包含除'无'以外的所有元素
            other_items = [item for item in cols if item != '无']
            # 将原列表替换为只包含'无'和这个新列表
            cols = ['无', other_items]
        dicts[column] = cols
    # 写成二进制文件
    with open(f'./data/pickle/{sheet_name}.pickle', 'wb') as f:
        pickle.dump(dicts, f)


if __name__ == '__main__':
    path = "E:\workspace\work\company\data_processing\data\sample.xlsx"
    data_save_pkl(path, "基建")
    data_save_pkl(path, "生产")
    data_save_pkl(path, "营销")
